library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2     v purrr   0.3.4
## v tibble  3.0.4     v dplyr   1.0.2
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.0
## Warning: package 'tibble' was built under R version 4.0.3
## Warning: package 'tidyr' was built under R version 4.0.3
## Warning: package 'readr' was built under R version 4.0.3
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(readxl)
library(RColorBrewer)
library(cowplot)
library(gt)
## Warning: package 'gt' was built under R version 4.0.3
theme_set(theme_minimal())

Objectives

Context

National:

Personal:

The Friends of the Earth Report - “England’s Green Space Gap”

Questions for Exploratory Data Analysis

Where is green space deprivation experienced?

Who is living in green space deprivation?

The relationship between green space deprivation and health?

Ideas:

Data sources used

data_sources <- tribble(
  ~file_name, ~name, ~notes, ~url,
  "(FOE) Green Space Consolidated Data - England - Version 2.1.xlsx",   "green_space", "...", "https://friendsoftheearth.uk/nature/green-space-consolidated-data-england",
  "Local_Authority_District_to_Region__December_2019__Lookup_in_England.csv",   "LAD_to_region", "used the December 2019 version", "https://geoportal.statistics.gov.uk/datasets/3ba3daf9278f47daba0f561889c3521a_0"
)

data_sources

Local Authority District to Region lookup (December 2019 used) - https://geoportal.statistics.gov.uk/datasets/3ba3daf9278f47daba0f561889c3521a_0

green_space <- read_excel("../Data/(FOE) Green Space Consolidated Data - England - Version 2.1.xlsx",
                          sheet = "MSOAs V2.1")
## New names:
## * `` -> ...1
green_space
# FOE green space data doesn't include a variable for the region each MSOA/local authority lies within
# So, import a look up table from the ONS, which maps between Local Authority Districts and Regions
LAD_to_region <- read_csv("../Data/Local_Authority_District_to_Region__December_2019__Lookup_in_England.csv") %>% 
  select(-FID, -LAD19NM)
## 
## -- Column specification --------------------------------------------------------
## cols(
##   FID = col_double(),
##   LAD19CD = col_character(),
##   LAD19NM = col_character(),
##   RGN19CD = col_character(),
##   RGN19NM = col_character()
## )
LAD_to_region
# add region names and codes (from ONS look up) to FOE green space data
green_space <- green_space %>% 
  left_join(LAD_to_region, by = c("LA_Code" = "LAD19CD")) %>% 
  relocate(RGN19CD, RGN19NM, .before = Area) %>% 
  rename(region_code = RGN19CD,
         region_name = RGN19NM)

green_space

title_lab <- "Numbers of neighbourhoods in England by green space\ndeprivation rating\n"
subtitle_lab <- "E is highest level of green space deprivation\n\nA is lowest level of green space deprivation"
x_lab <-  "Green space deprivation rating\n\n"
y_lab <-  "\nNumber of Neighbourhoods\n"
caption_lab <-  "Source: Friends of the Earth, England's Green Space Gap"

msoa_by_rating <- green_space %>% 
  group_by(Green_Space_Deprivation_Rating) %>% 
  summarise(msoa_count = n()) %>% 
  add_column(bar_label = c("Large or very large gardens and large or \nvery large amounts of public green space",
                           "",
                           "",
                           "",
                           "Very small gardens and very small\namounts of public green space") # add labels to be used on the plot
             ) %>% 
  ungroup() %>% 
  mutate(prop_rating = msoa_count / sum(msoa_count))
## `summarise()` ungrouping output (override with `.groups` argument)
msoa_by_rating
p <- ggplot(data = msoa_by_rating,
            mapping = aes(x = Green_Space_Deprivation_Rating, y = msoa_count)
            )

p + geom_segment(mapping = aes(x = Green_Space_Deprivation_Rating,
                               xend = Green_Space_Deprivation_Rating,
                               y = 750,
                               yend = msoa_count),
                 colour = "grey50",
                 size = 1.5
                 ) +
  
  geom_point(mapping = aes(fill = Green_Space_Deprivation_Rating),
             size = 7.5,
             shape = 21, 
             colour = "grey50", 
             stroke = 1.25
             ) +
  
  scale_fill_brewer(type = "div",
                    palette = "RdYlGn",
                    direction = -1) +
  
  
  labs(title = title_lab,
       subtitle = subtitle_lab,
       x = x_lab,
       y = y_lab,
       caption = caption_lab) +
  
  guides(fill = FALSE) +
  
  # facet_wrap(~ Green_Space_Deprivation_Rating) +
  
  coord_flip()

The distribution of green space deprivation ratings across England

colour_palette <- scales::seq_gradient_pal(low = "palegreen4", high = "grey")
colour_scale <- colour_palette(seq(0, 1, length.out = 5))
#colour_scale <- c("springgreen4", "palegreen4", "darkseagreen3", "grey75", "grey60")

p + geom_bar(mapping = aes(fill = Green_Space_Deprivation_Rating),
             stat = "identity") +
  
  scale_fill_manual(values = colour_scale) +
  
  geom_text(mapping = aes(label = bar_label), hjust = 1.1, colour = "white", size = 3.5) +
  
  guides(fill = FALSE) +
  
  labs(title = title_lab,
       x = x_lab,
       y = y_lab,
       caption = caption_lab) +
  
  theme(axis.text.x = element_text(face = "bold"),
        axis.text.y = element_text(face = "bold", size = 15),
        plot.title = element_text(face = "bold", size = 15)
        ) +
  
  coord_flip()

Below I plot the proportions of the 6791 MSOAs analyzed given each green space deprivation rating.

p <- ggplot(data = msoa_by_rating,
            mapping = aes(x = Green_Space_Deprivation_Rating, y = prop_rating)
            )

p + geom_bar(mapping = aes(fill = Green_Space_Deprivation_Rating),
             stat = "identity") +
  
  scale_fill_manual(values = colour_scale) +
  
  geom_text(mapping = aes(label = bar_label), hjust = 1.1, colour = "white", size = 3.5) +
  
  guides(fill = FALSE) +
  
  labs(title = "Proportion of neighbourhoods in England by green space\ndeprivation rating\n",
       x = x_lab,
       y = "Proportion of neighbourhoods",
       caption = caption_lab) +
  
  theme(axis.text.x = element_text(face = "bold"),
        axis.text.y = element_text(face = "bold", size = 15),
        plot.title = element_text(face = "bold", size = 15)
        ) +
  
  coord_flip()

msoa_by_rating

The distribution of green space deprivation ratings across the regions

The two plots below, respectively show for each region the numbers or proportion of MSOA with each green space deprivation rating. Key insights from the two plots include:

msoa_by_region_and_rating <- green_space %>% 
  
  # count by region and rating
  group_by(region_name, Green_Space_Deprivation_Rating) %>% 
  summarise(msoa_count = n()) %>% 
  filter(!is.na(region_name)) %>% 
  ungroup() %>% 

  # calculate rating proportions by region
  group_by(region_name) %>% 
  mutate(prop = msoa_count / sum(msoa_count)) %>% 
  ungroup()
## `summarise()` regrouping output by 'region_name' (override with `.groups` argument)
msoa_by_region_and_rating
plot_msoas_by_region <-  function(y_var){
  
  p <- ggplot(data = msoa_by_region_and_rating,
            mapping = aes(x = Green_Space_Deprivation_Rating, y = .data[[y_var]])
            )

  p + geom_bar(mapping = aes(fill = Green_Space_Deprivation_Rating),
             stat = "identity",
             colour = "grey80") +
  
  scale_fill_brewer(type = "div",
                    palette = "RdYlGn",
                    direction = -1) +
  
  guides(fill = FALSE) +
  
  coord_flip() +
  
  facet_wrap(~ region_name, ncol = 3)
}

plot_msoas_by_region("msoa_count")

plot_msoas_by_region("prop")

Below I plot again plot the proportion of MSOAs within each region receiving each green space deprivation rating. This time faceting the plot by green space deprivation rating rather than region. This makes it easier to compare across regions at a given rating.

It would be easier to read if I produced separate plots for each green space deprivation rating, as then the regions could be put in rank order.

prop_a <- msoa_by_region_and_rating %>%
  filter(Green_Space_Deprivation_Rating == "A") %>% 
  mutate(prop_a = prop) %>% 
  select(region_name, prop_a)

msoa_by_region_and_rating <- msoa_by_region_and_rating %>% 
  left_join(prop_a)
## Joining, by = "region_name"
msoa_by_region_and_rating  
p <- ggplot(data = msoa_by_region_and_rating,
            mapping = aes(x = reorder(region_name, prop_a), y = prop, fill = region_name)
            )

p + geom_bar(stat = "identity") +
  facet_wrap(~ Green_Space_Deprivation_Rating) +
  
  guides(fill = FALSE) +
  
  coord_flip()

Next I explored an alternative appraoch to considering the distribution of msoas by rating and region. I plotted the proportion of MSOAs that received a specific rating by region. In order to address the ordering issue above, I produced one plot for each green space rating. This meant addressing the challenge of how to ensure the colour associated with a given region was applied consistently across the five plots. Here is where I found the an approach to doing this, using scale_…_manual.

How to map a colour to a value of a categorical variable …

This approach help me identify some additional insights:

# The idea is to produce one bar chart for each green space rating, and across these plots use the same colour for a given region. 
# choose a palette to work with
region_pal <- brewer.pal(9, "Paired")

# map the region names to a specific colour value (in a dataframe for ease)
region_colours_df <- msoa_by_region_and_rating %>% 
  select(region_name) %>% 
  distinct(region_name) %>% 
  add_column(colour = region_pal)

# convert the region-colour mapping from a dataframe to a vector
# because a vector is required by scale_fill_manual
region_colours_vector <- region_colours_df$colour
names(region_colours_vector) <- region_colours_df$region_name
region_colours_vector
##            East Midlands          East of England                   London 
##                "#A6CEE3"                "#1F78B4"                "#B2DF8A" 
##               North East               North West               South East 
##                "#33A02C"                "#FB9A99"                "#E31A1C" 
##               South West            West Midlands Yorkshire and The Humber 
##                "#FDBF6F"                "#FF7F00"                "#CAB2D6"
plot_gsdr_prop_by_region <- function(rating = "A"){
  
    msoa_by_region_and_rating %>%
      
    # focus on the rating of interest
    filter(Green_Space_Deprivation_Rating == rating) %>% 
    mutate(proportion = msoa_count / sum(msoa_count)) %>%
    
    # create the bar chart
    ggplot(mapping = aes(x = reorder(region_name, proportion), 
                         y = proportion,
                         fill = region_name
                         )
           ) +
      
    geom_col() +
    
    # apply region-colour mapping  
    scale_fill_manual(values = region_colours_vector) +
    
    guides(fill = FALSE) +
      
    labs(title = str_c("MSOAs receiving green space deprivation rating: ", rating),
         x = NULL,
         y = str_c("Proportion of the MSOAs that recieved a rating of ", rating)
         ) +
    
    coord_flip()
}


# produce one plot for each of the five green space deprivation ratings
gsdr_prop_by_region_plots <- c("A", "B", "C", "D", "E") %>% 
  map(~ plot_gsdr_prop_by_region(.x))

gsdr_prop_by_region_plots
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

d_plot <- gsdr_prop_by_region_plots[[4]] + 
  ylim(0, 0.45) # ensure the plot axis scales match
  
e_plot <- gsdr_prop_by_region_plots[[5]] + 
  ylim(0, 0.45) # ensure the plot axis scales match

plot_grid(d_plot, e_plot, ncol = 2)

Could do ridgeline plots for each region - for prop rated A and proportion rated E

The geographic distribution of green space deprivation ratings

https://datacarpentry.org/r-raster-vector-geospatial/06-vector-open-shapefile-in-r/

library(sf)

msoa_sf <- st_read("../Data/Middle_Layer_Super_Output_Areas/Middle_Layer_Super_Output_Areas__December_2011__Boundaries_EW_BFE.shp")

# msoa_sf <- raster::shapefile("../Data/Middle_Layer_Super_Output_Areas/Middle_Layer_Super_Output_Areas__December_2011__Boundaries_EW_BFE.shp")



msoa_sf_gs <- msoa_sf %>% 
  left_join(green_space, by = c("MSOA11CD" = "MSOA_Code"))

A quick visual inspection of the MSOAs colored by their green space deprivation rating, shows a similiar patter across the regions (with the exception of London). With the the D and E ratings (oranges and reds) occurring in smaller (presumably more densely populated MSOAs) which make up urban areas. While the larger, more rural MSOAs tend to be less green space deprived, and have A or B ratings. Given the whole region of London would probably be considered a continuous urban space, it is unsurprising to observe many green space deprived MSOAs across the region/plot, with relatively few less green space deprived areas present.

green_space_region_plot <- function(region){

  p <-  ggplot(data = msoa_sf_gs %>% 
                 filter(region_name == region),
               mapping = aes(fill = Green_Space_Deprivation_Rating)
               )
  
  p + geom_sf(colour = "grey75") +
    
    scale_fill_brewer(type = "div",
                      palette = "RdYlGn",
                      direction = -1) +
    
    labs(title = str_c("...", region))
}

region_names <- green_space %>%
  select(region_name) %>% 
  distinct(region_name)

region_names %>%
  filter(!is.na(region_name)) %>% 
  pmap(~ green_space_region_plot(region = .x))

Ethnicity and green space deprivation

green_space <- green_space %>% 
  mutate(prop_BAME_pop = BAME_Pop / Total_Pop_From_Ethnicity_Data) %>%
  relocate(prop_BAME_pop, .after = BAME_Pop)

p <- ggplot(data = green_space,
            mapping = aes(x = Green_Space_Deprivation_Rating,
                          y = prop_BAME_pop)
            )

p + geom_jitter(alpha = 0.5, colour = "grey75") +
  geom_boxplot(mapping = aes(fill = Green_Space_Deprivation_Rating), 
               alpha = 0.5, 
               size = 1.25,
               colour = "grey25") +
  
  scale_fill_brewer(type = "div",
                      palette = "RdYlGn",
                      direction = -1) +
  
  guides(fill = FALSE) +
  
  coord_flip()

library(ggridges)

p <- ggplot(data = green_space,
            mapping = aes(y = Green_Space_Deprivation_Rating,
                          x = prop_BAME_pop,
                          fill = Green_Space_Deprivation_Rating)
            )

p + geom_density_ridges(alpha = 0.6, scale = 1.5) +
  scale_fill_brewer(type = "div",
                      palette = "RdYlGn",
                      direction = -1) 
## Picking joint bandwidth of 0.0272

Wealth and green space deprivation

Health and green space deprivation

Covid and green space deprivation